To run: Beforehand:

module load pandoc

In R:

setwd("~/shared-gandalm/brain_CTP/Scripts/methylation/analysis/combine_methods/ctp_validation")
rmarkdown::render("ctp_validation_Wong2018_ASDmethylation.Rmd", "html_document")

1 Set-up

1.1 Directories and libraries

library(tidyverse)
library(rmarkdown)    # You need this library to run this template.
library(epuRate)      # Install with remotes::install_github("holtzy/epuRate", force=TRUE) - use this instead of devtools::install_github()
library(data.table)
library(DT)
library(tidyverse)
library(dplyr)
library(reshape2)
library(uwot) # for umap
library(methylCC)
library(lme4)
library(compositions)
library(zCompositions)
library(mixOmics)
source("~/project-gandalm/software/ANCOM-v2.1/scripts/ancom_v2.1.R")
library(ALDEx2)
library(factoextra)
library(ggplot2)
library(RColorBrewer)
library(viridis)
library(ggpubr)
library(GGally)
library(lattice)
library(gridExtra)

# BiocManager::install("M3C")
# library(M3C)
filen <- "KCL_R01MH094714_ASD_Illumina450K_PFC"
out_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/ASD_methylation_brain/analysis"

# Cell-type number
cellnum <- 9
cellnum <- 7

write_out = TRUE

# Read in reference object
# - use chisq statistic, 5 cell-types, bonferroni p<0.001, marker contributing >=50% of chisq stat, take top 200
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_chisq_dmr_cell5_bonf001_contr50pc_top200.rds"
# - use extreme_methylation_sites.R output
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_extreme17_dmr_low0.3_high0.7.rds"
# - use chisq statistic, 5 cell-types, bonferroni p<0.001, marker contributing >=50% of chisq stat, take top 200
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_chisq_dmr_cell5_bonf001_contr50pc_top200.rds"
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_chisq_dmr_ilmn450kepic_aggto100bp_cell7_bonf001_contr50pc_top200.rds"
#ref <- readRDS(ref_dir)

# Pheno
pheno_dir <- "~/shared-gandalm/brain_CTP/Data/pheno/ASD_methylation_brain/Wong_ASD_Brain_Methylation_SuppTable1_pheno.csv"

# methylCC
# - use Luo et al. reference with DMRs identified from 450K overlap 
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_methylcc_out_n200_p1e-6.rds", sep = ""))
# - use 450K dlpfc guintivano reference (+/- randomisation of sample order)
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_methylcc_out_n200_dlpfc_450k_guintivano_hpc.rds", sep = ""))
# - use Luo et al. reference with DMRs pre-identified by Luo et al.
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_predmr_ilmn450k_celltype_na5_methylcc_dmr_n200_p1e_1.rds", sep = ""))
# - use extreme_methylation_sites.R output
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_extreme17_dmr_low0.3_high0.7.rds", sep = ""))
# - use chisq statistic, 5 cell-types, bonferroni p<0.001, marker contributing >=50% of chisq stat, take top 200
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_chisq_dmr_cell5_bonf001_contr50pc_top200.rds", sep = ""))
#methylcc_dir <- paste(out_dir, "/", filen, "_aut_mask_chisq_dmr_cell5_bonf001_contr50pc_top200.rds", sep = "")
filen0 <- "KCL_R01MH094714_ASD_Illumina450K_PFC"
# methylcc_dir <- paste(out_dir, "/", filen0, "_aut_mask_chisq_dmr_cell5_bonf001_contr50pc_top200_aggpostmcc.txt", sep = "")
#methylcc_dir <- paste(out_dir, "/", filen0, "_aut_mask_chisq_dmr_ilmn450kepic_aggto100bp_cell7_bonf001_contr50pc_top200_aggpostmcc.txt", sep = "")

if (cellnum == 9) {
  methylcc_dir <- paste(out_dir, "/", filen0, "_aut_mask_extremes_dmr_ilmn450kepic_aggto100bp_c10_cell9_split6040all_aggpostmcc.txt", sep = "")
} else if (cellnum == 7) {
  methylcc_dir <- paste(out_dir, "/", filen0, "_aut_mask_extremes_dmr_ilmn450kepic_aggto100bp_c10_cell7_split6040all_aggpostmcc.txt", sep = "")
}

# methylCC with NeuN+/- data (Guintivano DLPFC)
methylcc_neun_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/ASD_methylation_brain/analysis/KCL_R01MH094714_ASD_Illumina450K_PFC_aut_mask_methylcc_out_n200_dlpfc_450k_guintivano_hpc.rds"

# eBayes + Houseman
houseman_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/ASD_methylation_brain/analysis/KCL_R01MH094714_ASD_Illumina450K_PFC_ref450K_toast_houseman_ls.rds"

# sequencing with extremes + Houseman
houseman_seq_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/ASD_methylation_brain/analysis/KCL_R01MH094714_ASD_Illumina450K_PFC_Luo2020_extremes_dmr_ilmn450kepic_aggto100bp_c10_cell7_split6040all_houseman_ls.rds"

# celfie
celfie_dir <-  "~/shared-gandalm/brain_CTP/Data/methylation/ASD_methylation_brain/analysis/celfie/KCL_R01MH094714_ASD_Illumina450K_PFC_celfie.out/1_tissue_proportions.txt"
celfie_label_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/ASD_methylation_brain/analysis/celfie/KCL_R01MH094714_ASD_Illumina450K_PFC_celfie.in"

# ReFACToR
#refactor_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/ASD_methylation_brain/analysis/KCL_R01MH094714_ASD_Illumina450K_PFC_k8_cov.refactor.components.txt"

# smartSV
smartsva_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/ASD_methylation_brain/analysis/ASD_methylation_brain_SmartSVA.rds"

# VAE/MethylNet
vae_dir <- "~/shared-gandalm/brain_CTP/Data/integration/methylnet/combined/full_output_latent.csv"

1.2 Read-in data

# Pheno
pheno <- read.csv(pheno_dir, header = T, as.is = T)
colnames(pheno)[which(colnames(pheno) == "SampleName")] <- "IID"
colnames(pheno)[which(colnames(pheno) == "Age")] <- "age"
colnames(pheno)[which(colnames(pheno) == "Sex")] <- "sex"
pheno$Batch <- as.factor(pheno$Batch)
pheno$group <- ifelse(pheno$Diagnosis == "CTL", "CTL", "ASD")

# methylCC
methylcc_ctp <- read.delim(methylcc_dir)
ind <- grep("NonN", colnames(methylcc_ctp))
foo <- colnames(methylcc_ctp)[ind]
foo2 <- unlist(lapply(strsplit(gsub("NonN_", "", foo), "_"), function(x) x[1]))
colnames(methylcc_ctp)[ind] <- foo2
colnames(methylcc_ctp)[2:ncol(methylcc_ctp)] <- paste("mcc_", colnames(methylcc_ctp)[2:ncol(methylcc_ctp)], sep = "")
# methylcc <- readRDS(methylcc_dir)
# methylcc_ctp <- cell_counts(methylcc)
# ctp_sum <- methylcc@summary
# ctp_input <- ctp_sum$input
# colSums(ctp_input$zmat)
# # Would use this to mimic Supp Fig 1
# colSums(ref$zmat) # this is the initial starting proportions

# methylCC on Guintivano DLPFC reference (NeuN+/-)
# methylcc_neun <- readRDS(methylcc_neun_dir)
# methylcc_neun_ctp <- cell_counts(methylcc_neun)
# colnames(methylcc_neun_ctp) <- c("mcc_NeuN_neg", "mcc_NeuN_pos")
# ctp_neun_sum <- methylcc_neun@summary
# ctp_neun_input <- ctp_neun_sum$input
# colSums(ctp_neun_input$zmat)

# eBayes + Houseman
houseman.ls <- readRDS(houseman_dir)
# - coefs_sub
coefs_sub <- houseman.ls$coefs_sub
colnames(coefs_sub)[2:ncol(coefs_sub)] <- paste("h_", colnames(coefs_sub)[2:ncol(coefs_sub)], sep = "")
colnames(coefs_sub)[ncol(coefs_sub)] <- "h_error_sub"
coefs_brain <- houseman.ls$coefs_brain
colnames(coefs_brain)[2:ncol(coefs_brain)] <- paste("h_", colnames(coefs_brain)[2:ncol(coefs_brain)], sep = "")
colnames(coefs_brain)[ncol(coefs_brain)] <- "h_error_brain"

# sequencing + Houseman
houseman_seq.ls <- readRDS(houseman_seq_dir)
houseman_seq <- houseman_seq.ls[[1]]
ind <- grep("NonN", colnames(houseman_seq))
foo <- colnames(houseman_seq)[ind]
foo2 <- unlist(lapply(strsplit(gsub("NonN_", "", foo), "_"), function(x) x[1]))
colnames(houseman_seq)[ind] <- foo2
colnames(houseman_seq)[2:ncol(houseman_seq)] <- paste("hseq_", colnames(houseman_seq)[2:ncol(houseman_seq)], sep = "")

# CelFIE
celfie <- fread(celfie_dir)
colnames(celfie) <- c("IID_short", "celfie_Exc", "celfie_Inh", "celfie_Astro", "celfie_Endo", "celfie_Micro", "celfie_Oligo", "celfie_OPC", "unknown1")
iid_short <- data.frame(IID = pheno$IID[which(pheno$RegionID == "BA9")])
iid_short$IID_short <- unlist(lapply(strsplit(iid_short$IID, "_"), function(x) x[1]))
celfie <- inner_join(iid_short, celfie, by = "IID_short")
#celfie_label <- fread(celfie_label_dir)

# ReFACToR
# refactor <- read.table(refactor_dir, header = T, as.is = T)
# colnames(refactor)[1] <- "IID"

# smartSVA
smartsva <- readRDS(smartsva_dir)$KCL_R01MH094714_ASD_Illumina450K_PFC
smartsva <- data.frame(IID = rownames(smartsva), smartsva)

# VAE, MethylNet
vae <- read.csv(vae_dir)
colnames(vae) <- c("IID", paste("VAEe", 0:9, sep = ""))

1.2.1 Join into 1 dataframe

#ctp_pheno <- inner_join(data.frame(IID = rownames(methylcc_ctp), methylcc_ctp), pheno, by = "IID")
methylcc_ctp.tmp <- methylcc_ctp
methylcc_ctp.tmp$mcc_Glial <- rowSums(methylcc_ctp.tmp[,-c(grep("IID", colnames(methylcc_ctp.tmp)), grep("Exc", colnames(methylcc_ctp.tmp)), grep("Inh", colnames(methylcc_ctp.tmp)))])
methylcc_ctp.tmp$mcc_Neuron <- rowSums(methylcc_ctp.tmp[,c(grep("Exc", colnames(methylcc_ctp.tmp)), grep("Inh", colnames(methylcc_ctp.tmp)))])
ctp_pheno <- inner_join(methylcc_ctp.tmp, pheno, by = "IID")
#ctp_pheno <- inner_join(ctp_pheno, data.frame(IID = rownames(methylcc_neun_ctp), methylcc_neun_ctp), by = "IID")
ctp_pheno <- inner_join(ctp_pheno, smartsva, by = "IID")

# Houseman with array reference
ctp_pheno <- inner_join(ctp_pheno, coefs_sub, by = "IID")
ctp_pheno <- inner_join(ctp_pheno, coefs_brain, by = "IID")
ctp_pheno$h_Glial <- rowSums(ctp_pheno[,c("h_ASTRO", "h_OPC", "h_OLIGO", "h_ENDO", "h_MONO")])
ctp_pheno$h_Neuron <- rowSums(ctp_pheno[,c("h_GABA", "h_GLU")])

# Houseman with sequencing reference
ctp_pheno <- inner_join(ctp_pheno, houseman_seq, by = "IID")
ctp_pheno$hseq_Glial <- rowSums(ctp_pheno[,c("hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC")])
ctp_pheno$hseq_Neuron <- rowSums(ctp_pheno[,c("hseq_Exc", "hseq_Inh")])

# CelFIE
ctp_pheno <- inner_join(ctp_pheno, celfie, by = c("IID"))
ctp_pheno$celfie_Glial <- rowSums(ctp_pheno[,c("celfie_Astro", "celfie_Endo", "celfie_Micro", "celfie_Oligo", "celfie_OPC")])
ctp_pheno$celfie_Neuron <- rowSums(ctp_pheno[,c("celfie_Exc", "celfie_Inh")])

# Choice to exclude Oxford brain bank subset
# - choose not to as lose statistical power ...
# ctp_pheno <- ctp_pheno %>% filter(BrainBank != "Oxford")

ctp_pheno <- inner_join(ctp_pheno, vae, by = "IID")

1.2.2 Write output

if (write_out == TRUE) {
  write.table(ctp_pheno, paste(out_dir, "/", filen, "_ctp_pheno.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}

1.2.3 Set variables

keep_cols <- c("IID", "group", "age", "sex", "Batch", "PMI", "BrainBank")

ctp_cols <- c(colnames(ctp_pheno)[grep("hseq", colnames(ctp_pheno))])
ctp_cols <- ctp_cols[-which(ctp_cols == "hseq_error")]
level2_cols <- ctp_cols[-which(ctp_cols %in% c("hseq_Glial", "hseq_Neuron"))]

glial_rmoligo_n <- colnames(ctp_pheno)[grep("hseq", colnames(ctp_pheno))]
glial_rmoligo_n <- glial_rmoligo_n[-which(glial_rmoligo_n %in% c("hseq_Exc", "hseq_Inh", "hseq_Oligo", "hseq_error", "hseq_Glial", "hseq_Neuron"))]
neuron_n <- c("hseq_Exc", "hseq_Inh")

2 Check baseline variable differences

  • look for confounding
  • there is confounding for age, brain bank, batch (when looking at the PFC samples only)
  • this means that accounting for covariates might also take out the ASD signal?!
# BrainBank
brainbank <- ctp_pheno %>% group_by(group, BrainBank) %>% summarise(n=n()) %>% spread(group, n)
## `summarise()` has grouped output by 'group'. You can override using the `.groups` argument.
brainbank
## # A tibble: 3 × 3
##   BrainBank   ASD   CTL
##   <chr>     <int> <int>
## 1 ATP          30    13
## 2 NICHD         8     6
## 3 Oxford        4    14
chisq.test(brainbank %>% dplyr::select("ASD", "CTL"))
## 
##  Pearson's Chi-squared test
## 
## data:  brainbank %>% dplyr::select("ASD", "CTL")
## X-squared = 11.65, df = 2, p-value = 0.002953
# Batch
batch <- ctp_pheno %>% group_by(group, Batch) %>% summarise(n=n()) %>% spread(group, n)
## `summarise()` has grouped output by 'group'. You can override using the `.groups` argument.
batch
## # A tibble: 2 × 3
##   Batch   ASD   CTL
##   <fct> <int> <int>
## 1 1        35    33
## 2 2         7    NA
#chisq.test(batch %>% dplyr::select("ASD", "CTL"))

# Sex
sex <- ctp_pheno %>% group_by(group, sex) %>% summarise(n=n()) %>% spread(group, n)
## `summarise()` has grouped output by 'group'. You can override using the `.groups` argument.
sex
## # A tibble: 2 × 3
##   sex     ASD   CTL
##   <chr> <int> <int>
## 1 F         8     7
## 2 M        34    26
chisq.test(sex %>% dplyr::select("ASD", "CTL"))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  sex %>% dplyr::select("ASD", "CTL")
## X-squared = 8.5375e-31, df = 1, p-value = 1
# Age
summary(lm(age ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = age ~ group, data = ctp_pheno)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.515 -15.817  -0.515  14.985  39.881 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   27.119      2.757   9.835 5.16e-15 ***
## groupCTL      20.396      4.157   4.906 5.47e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.87 on 73 degrees of freedom
## Multiple R-squared:  0.248,  Adjusted R-squared:  0.2377 
## F-statistic: 24.07 on 1 and 73 DF,  p-value: 5.466e-06
ggplot(ctp_pheno, aes(x = age, colour = group, fill = group)) + geom_histogram() + facet_wrap(~group)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# BrainBank x age x group
ctp_pheno %>% group_by(BrainBank, group) %>% summarise(mean = mean(age), sd = sd(age))
## `summarise()` has grouped output by 'BrainBank'. You can override using the `.groups` argument.
## # A tibble: 6 × 4
## # Groups:   BrainBank [3]
##   BrainBank group  mean    sd
##   <chr>     <chr> <dbl> <dbl>
## 1 ATP       ASD    25.3 16.6 
## 2 ATP       CTL    36.9 13.9 
## 3 NICHD     ASD    28.8 21.3 
## 4 NICHD     CTL    31.8 15.4 
## 5 Oxford    ASD    37.2 12.4 
## 6 Oxford    CTL    64.1  8.12

3 Check association of sSVs and batch effects

tmp.long <- ctp_pheno %>% dplyr::select(c("IID", "BrainBank", "Batch", "sex", "age", "group", "hseq_Glial", "sSV1", "sSV2", "sSV3", "sSV4", "sSV5", "sSV6", "sSV7", "sSV8")) %>% melt(id.vars = c("IID", "BrainBank", "Batch", "sex", "age", "group", "hseq_Glial"), variable.name = "sSV_num", value.name = "sSV")

ggplot(tmp.long, aes(x = BrainBank, y = sSV)) + geom_boxplot() + facet_wrap(~ sSV_num, scales = "free_y")

ggplot(tmp.long, aes(x = Batch, y = sSV, colour = Batch)) + geom_boxplot() + facet_wrap(~ sSV_num, scales = "free_y")

ggplot(tmp.long, aes(x = Batch, y = sSV, colour = BrainBank)) + geom_boxplot() + facet_wrap(~ sSV_num, scales = "free_y")

ggplot(tmp.long, aes(x = group, y = sSV, colour = group)) + geom_boxplot() + facet_wrap(~ sSV_num, scales = "free_y")

ggplot(tmp.long, aes(x = sex, y = sSV)) + geom_boxplot() + facet_wrap(~ sSV_num)

ggplot(tmp.long, aes(x = age, y = sSV, colour = BrainBank)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ sSV_num)
## `geom_smooth()` using formula 'y ~ x'

ggplot(tmp.long, aes(x = hseq_Glial, y = sSV, colour = BrainBank)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ sSV_num)
## `geom_smooth()` using formula 'y ~ x'

4 Check association of VAE embeddings and batch effects

tmp.long <- ctp_pheno %>% dplyr::select(c("IID", "BrainBank", "Batch", "sex", "age", "group", "hseq_Glial", "VAEe0", "VAEe1", "VAEe2", "VAEe3", "VAEe4", "VAEe5", "VAEe6", "VAEe7", "VAEe8", "VAEe9")) %>% melt(id.vars = c("IID", "BrainBank", "Batch", "sex", "age", "group", "hseq_Glial"), variable.name = "VAEe", value.name = "embedding")

ggplot(tmp.long, aes(x = BrainBank, y = embedding)) + geom_boxplot() + facet_wrap(~ VAEe, scales = "free_y")

ggplot(tmp.long, aes(x = Batch, y = embedding, colour = Batch)) + geom_boxplot() + facet_wrap(~ VAEe, scales = "free_y")

ggplot(tmp.long, aes(x = Batch, y = embedding, colour = BrainBank)) + geom_boxplot() + facet_wrap(~ VAEe, scales = "free_y")

ggplot(tmp.long, aes(x = group, y = embedding, colour = group)) + geom_boxplot() + facet_wrap(~ VAEe, scales = "free_y")

ggplot(tmp.long, aes(x = sex, y = embedding)) + geom_boxplot() + facet_wrap(~ VAEe)

ggplot(tmp.long, aes(x = age, y = embedding, colour = BrainBank)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ VAEe)
## `geom_smooth()` using formula 'y ~ x'

ggplot(tmp.long, aes(x = hseq_Glial, y = embedding, colour = BrainBank)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ VAEe)
## `geom_smooth()` using formula 'y ~ x'

5 Boxplots - raw

Add analysis info on

houseman_seq.long <- melt(houseman_seq, variable.name = "celltype", value.name = "seq_houseman_CTP")
## Using IID as id variables
houseman_seq.gg <- ggplot(houseman_seq.long, aes(x = celltype, y = seq_houseman_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

methylcc_ctp.long <- melt(methylcc_ctp, variable.name = "celltype", value.name = "methylcc_CTP")
## Using IID as id variables
methylcc_ctp.gg <- ggplot(methylcc_ctp.long, aes(x = celltype, y = methylcc_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))
#+ theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1))

coefs_sub.long <- melt(coefs_sub, variable.name = "celltype", value.name = "eBayes_houseman_CTP") %>% mutate(MatchCell = match(celltype, c("h_GLU", "h_GABA", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC")))
## Using IID as id variables
coefs_sub.gg <- coefs_sub.long %>% mutate(celltype = fct_reorder(celltype, MatchCell)) %>% ggplot(aes(x = celltype, y = eBayes_houseman_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

coefs_brain.long <- melt(coefs_brain, variable.name = "celltype", value.name = "eBayes_houseman_CTP")
## Using IID as id variables
coefs_brain.gg <- ggplot(coefs_brain.long, aes(x = celltype, y = eBayes_houseman_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

smartsva.long <- melt(smartsva, variable.name = "celltype", value.name = "smartsva_CTP")
## Using IID as id variables
smartsva.gg <- ggplot(smartsva.long, aes(x = celltype, y = smartsva_CTP, colour = celltype)) + geom_boxplot()  + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

vae.long <- melt(vae, id.vars = c("IID"), variable.name = "celltype", value.name = "VAE_embed")
vae.gg <- ggplot(vae.long, aes(x = celltype, y = VAE_embed, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

celfie.long <- melt(celfie, id.vars = c("IID", "IID_short"), variable.name = "celltype", value.name = "CelFIE_CTP")
celfie.gg <- ggplot(celfie.long, aes(x = celltype, y = CelFIE_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

ggarrange(houseman_seq.gg, methylcc_ctp.gg, coefs_sub.gg, coefs_brain.gg, celfie.gg, smartsva.gg, nrow = 3, ncol = 2)

5.1 houseman_seq

# - boxplot
hseq_ctp_pheno.tmp <- ctp_pheno %>% dplyr::select(c(all_of(keep_cols), all_of(ctp_cols)))

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno.tmp, paste(out_dir, "/hseq_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
hseq_ctp_pheno.long <- melt(hseq_ctp_pheno.tmp, measure.vars = c(all_of(ctp_cols)), variable.name = "celltype", value.name = "CTP")
hseq_ctp_pheno.long$Level <- ifelse(hseq_ctp_pheno.long$celltype %in% c("hseq_Neuron", "hseq_Glial"), "Level1", "Level2")
hseq_ctp_pheno.long$Region <- "PFC"
hseq_ctp_pheno.long$celltype <- gsub("hseq_", "", hseq_ctp_pheno.long$celltype)
hseq_ctp_pheno.long$celltype <- unlist(lapply(strsplit(hseq_ctp_pheno.long$celltype, "_"), function(x) x[1]))
hseq_ctp_pheno.long$MatchCell <- match(hseq_ctp_pheno.long$celltype, rev(c("Exc", "Inh", "Astro", "Micro", "Endo", "Oligo", "OPC", "Neuron", "Glial")))
hseq_ctp_pheno.long %>% mutate(celltype = fct_reorder(celltype, MatchCell)) %>%
  ggplot(aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

5.1.1 Adjust for oligodendrocyte proportion

# New adjustment which only changes neuronal populations
hseq_ctp_pheno_adjoligo.tmp <- hseq_ctp_pheno.tmp %>%
  mutate(sum_neuro_oligo = hseq_Exc + hseq_Inh + hseq_Oligo) %>%
  mutate(Exc_Inh_ratio = (hseq_Exc/(hseq_Exc + hseq_Inh))) %>%
  mutate(Inh_Exc_ratio = (hseq_Inh/(hseq_Exc + hseq_Inh))) %>%
  mutate(hseq_Exc = Exc_Inh_ratio * sum_neuro_oligo) %>%
  mutate(hseq_Inh = Inh_Exc_ratio * sum_neuro_oligo) %>%
  dplyr::select(-c("hseq_Oligo", "hseq_Neuron", "hseq_Glial"))

# Old adjustment which changed ALL CTPs
# methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo <- methylcc_ctp_pheno_adjoligo.tmp$Exc + methylcc_ctp_pheno_adjoligo.tmp $Inh + methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R + methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP
# methylcc_ctp_pheno_adjoligo.tmp$Exc <- methylcc_ctp_pheno_adjoligo.tmp$Exc/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$Inh <- methylcc_ctp_pheno_adjoligo.tmp$Inh/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R <- methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP <- methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo

hseq_ctp_pheno_adjoligo.tmp$hseq_Neuron <- rowSums(hseq_ctp_pheno_adjoligo.tmp[,neuron_n])
hseq_ctp_pheno_adjoligo.tmp$hseq_Glial <- rowSums(hseq_ctp_pheno_adjoligo.tmp[,glial_rmoligo_n])

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno_adjoligo.tmp, paste(out_dir, "/hseq_adjoligo_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
hseq_ctp_pheno_adjoligo.long <- melt(hseq_ctp_pheno_adjoligo.tmp, measure.vars = c(all_of(neuron_n), all_of(glial_rmoligo_n), "hseq_Neuron", "hseq_Glial"), variable.name = "celltype", value.name = "CTP")
hseq_ctp_pheno_adjoligo.long$Level <- ifelse(hseq_ctp_pheno_adjoligo.long$celltype %in% c("hseq_Neuron", "hseq_Glial"), "Level1", "Level2")
hseq_ctp_pheno_adjoligo.long$Region <- "PFC"
hseq_ctp_pheno_adjoligo.long$celltype <- gsub("hseq_", "", hseq_ctp_pheno_adjoligo.long$celltype)

hseq_ctp_pheno_adjoligo.long %>% mutate(celltype = fct_reorder(celltype, match(celltype, rev(c("Exc", "Inh", "Astro", "Endo", "Micro", "OPC"))))) %>%
  ggplot(aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

5.2 methylCC

ctp_cols_mcc <- gsub("hseq", "mcc", ctp_cols)
neuron_n_mcc <- gsub("hseq", "mcc", neuron_n)
glial_rmoligo_n_mcc <- gsub("hseq", "mcc", glial_rmoligo_n)

# - boxplot
methylcc_ctp_pheno.tmp <- ctp_pheno %>% dplyr::select(c(all_of(keep_cols), all_of(ctp_cols_mcc)))

if (write_out == TRUE) {
  write.table(methylcc_ctp_pheno.tmp, paste(out_dir, "/methylcc_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
methylcc_ctp_pheno.long <- melt(methylcc_ctp_pheno.tmp, measure.vars = c(all_of(ctp_cols_mcc)), variable.name = "celltype", value.name = "CTP")
methylcc_ctp_pheno.long$Level <- ifelse(methylcc_ctp_pheno.long$celltype %in% c("mcc_Neuron", "mcc_Glial"), "Level1", "Level2")
methylcc_ctp_pheno.long$Region <- "PFC"
methylcc_ctp_pheno.long$celltype <- gsub("mcc_", "", methylcc_ctp_pheno.long$celltype)

methylcc_ctp_pheno.long %>% mutate(celltype = fct_reorder(celltype, match(celltype, rev(c("Exc", "Inh", "Astro", "Endo", "Micro", "Oligo", "OPC"))))) %>%
ggplot(aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

5.2.1 Adjust for oligodendrocyte proportion

# New adjustment which only changes neuronal populations
methylcc_ctp_pheno_adjoligo.tmp <- methylcc_ctp_pheno.tmp %>%
  mutate(sum_neuro_oligo = mcc_Exc + mcc_Inh + mcc_Oligo) %>%
  mutate(Exc_Inh_ratio = (mcc_Exc/(mcc_Exc +mcc_Inh))) %>%
  mutate(Inh_Exc_ratio = (mcc_Inh/(mcc_Exc + mcc_Inh))) %>%
  mutate(mcc_Exc = Exc_Inh_ratio * sum_neuro_oligo) %>%
  mutate(mcc_Inh = Inh_Exc_ratio * sum_neuro_oligo) %>%
  dplyr::select(-c("mcc_Oligo", "mcc_Neuron", "mcc_Glial"))

# Old adjustment which changed ALL CTPs
# methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo <- methylcc_ctp_pheno_adjoligo.tmp$Exc + methylcc_ctp_pheno_adjoligo.tmp $Inh + methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R + methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP
# methylcc_ctp_pheno_adjoligo.tmp$Exc <- methylcc_ctp_pheno_adjoligo.tmp$Exc/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$Inh <- methylcc_ctp_pheno_adjoligo.tmp$Inh/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R <- methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP <- methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo

methylcc_ctp_pheno_adjoligo.tmp$mcc_Neuron <- rowSums(methylcc_ctp_pheno_adjoligo.tmp[,neuron_n_mcc])
methylcc_ctp_pheno_adjoligo.tmp$mcc_Glial <- rowSums(methylcc_ctp_pheno_adjoligo.tmp[,glial_rmoligo_n_mcc])

if (write_out == TRUE) {
  write.table(methylcc_ctp_pheno_adjoligo.tmp, paste(out_dir, "/methylcc_adjoligo_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
methylcc_ctp_pheno_adjoligo.long <- melt(methylcc_ctp_pheno_adjoligo.tmp, measure.vars = c(all_of(neuron_n_mcc), all_of(glial_rmoligo_n_mcc), "mcc_Neuron", "mcc_Glial"), variable.name = "celltype", value.name = "CTP")
methylcc_ctp_pheno_adjoligo.long$Level <- ifelse(methylcc_ctp_pheno_adjoligo.long$celltype %in% c("mcc_Neuron", "mcc_Glial"), "Level1", "Level2")
methylcc_ctp_pheno_adjoligo.long$Region <- "PFC"
methylcc_ctp_pheno_adjoligo.long$celltype <- gsub("mcc_", "", methylcc_ctp_pheno_adjoligo.long$celltype)

methylcc_ctp_pheno_adjoligo.long %>% mutate(celltype = fct_reorder(celltype, match(celltype, rev(c("Exc", "Inh", "Astro", "Endo", "Micro", "OPC"))))) %>%
ggplot(aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

5.3 coefs_sub

# - boxplot
coefs_sub_ctp_pheno.tmp <- ctp_pheno %>% dplyr::select(c(all_of(keep_cols), "h_GLU", "h_GABA", "h_ASTRO", "h_ENDO", "h_MONO", "h_OPC", "h_OLIGO"))
coefs_sub_ctp_pheno.tmp$neuron_sum <- coefs_sub_ctp_pheno.tmp$h_GABA + coefs_sub_ctp_pheno.tmp$h_GLU
coefs_sub_ctp_pheno.tmp$glia_sum <- coefs_sub_ctp_pheno.tmp$h_ASTRO + coefs_sub_ctp_pheno.tmp$h_OPC + coefs_sub_ctp_pheno.tmp$h_OLIGO + coefs_sub_ctp_pheno.tmp$h_ENDO + coefs_sub_ctp_pheno.tmp$h_MONO

if (write_out == TRUE) {
  write.table(coefs_sub_ctp_pheno.tmp, paste(out_dir, "/coefs_sub_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
coefs_sub_ctp_pheno.long <- melt(coefs_sub_ctp_pheno.tmp, measure.vars = c("h_GLU", "h_GABA", "h_ASTRO", "h_ENDO", "h_MONO", "h_OPC", "h_OLIGO", "neuron_sum", "glia_sum"), variable.name = "celltype", value.name = "CTP")
coefs_sub_ctp_pheno.long$Level <- ifelse(coefs_sub_ctp_pheno.long$celltype %in% c("neuron_sum", "glia_sum"), "Level1", "Level2")
coefs_sub_ctp_pheno.long$Region <- "PFC"

ggplot(coefs_sub_ctp_pheno.long, aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

6 Boxplots: clr-transformation (2 ways to handle zeros)

  • Justification: does it make sense to run an ANOVA on CTP ~ group, if the CTPs are related?
  • Eg. what does variance analysis look like when the y-variable for each individual is non-independent?

6.1 houseman_seq

6.1.1 clr-transformation with z-compositions to deal with zeros

  • IDs go as columns, CTPs as rows for zCompositions cmultRepl(), and IDs as rows for the clr(?
keep_pheno_col <- hseq_ctp_pheno.tmp %>% dplyr::select(all_of(keep_cols))
hseq_ctp_pheno.tmp.clr <- as.matrix(hseq_ctp_pheno.tmp %>% dplyr::select(c(all_of(level2_cols))))

# Apply zCompositions to empirically get the best replacement of zeros
# - it looks like you have to transpose the matrix (with samples as columns) to do this properly (?)
length(which(hseq_ctp_pheno.tmp.clr == 0)) # check there are no 0s
## [1] 0
check <- which(hseq_ctp_pheno.tmp.clr == 0, arr.ind = T)

hseq_ctp_pheno.tmp.clr_t <- t(hseq_ctp_pheno.tmp.clr)

if (length(which(hseq_ctp_pheno.tmp.clr == 0)) > 0) {
  
  check <- which(hseq_ctp_pheno.tmp.clr_t == 0, arr.ind = T)
  hseq_ctp_pheno.tmp.clr0_t <- cmultRepl(hseq_ctp_pheno.tmp.clr_t, output = "p-counts")
  hseq_ctp_pheno.tmp.clr0_t[check]

} else {
  
  print("no zcompositions applied as no 0 values")
  hseq_ctp_pheno.tmp.clr0_t <- hseq_ctp_pheno.tmp.clr_t
  
}
## [1] "no zcompositions applied as no 0 values"
# Perform clr-transform
hseq_ctp_pheno.tmp.clr0.tr <- clr(t(hseq_ctp_pheno.tmp.clr_t))
hseq_ctp_pheno.clr.zc <- data.frame(keep_pheno_col, hseq_ctp_pheno.tmp.clr0.tr)

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno.clr.zc, paste(out_dir, "/hseq_ctp_pheno.clr.zc.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}

Make dataframe

hseq_ctp_pheno.clr.zc.long <- melt(hseq_ctp_pheno.clr.zc, measure.vars = c(all_of(level2_cols)), variable.name = "celltype", value.name = "CTP")
#methylcc_ctp_pheno.clr.zc.long$Level <- ifelse(methylcc_ctp_pheno.clr.zc.long$celltype %in% c("neuron_sum", "glia_sum"), "Level1", "Level2")
hseq_ctp_pheno.clr.zc.long$Region <- "PFC"
ggplot(hseq_ctp_pheno.clr.zc.long, aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +
  coord_flip() + theme_bw() + xlab("")

  ggtitle("zCompositions")
## $title
## [1] "zCompositions"
## 
## attr(,"class")
## [1] "labels"
    #facet_grid(Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

6.1.2 clr-transformation with 1e-3 minimum

  • Justification: does it make sense to run an ANOVA on CTP ~ group, if the CTPs are related?

  • Eg. what does variance analysis look like when the y-variable for each individual is non-independent?

  • N.B. IDs go as columns, CTPs as rows

keep_pheno_col <- hseq_ctp_pheno.tmp %>% dplyr::select(all_of(keep_cols))
hseq_ctp_pheno.tmp.clr <- as.matrix(hseq_ctp_pheno.tmp %>% dplyr::select(c(all_of(level2_cols))))

# Make minimum CTP = 1e-3
length(which(hseq_ctp_pheno.tmp.clr <= 1e-3)) # check there are no 0s
## [1] 0
hseq_ctp_pheno.tmp.clr0 <- hseq_ctp_pheno.tmp.clr
hseq_ctp_pheno.tmp.clr0[which(hseq_ctp_pheno.tmp.clr0 <= 1e-3)] <- 1e-3

# Perform clr-transform
hseq_ctp_pheno.tmp.clr0.tr <- clr(hseq_ctp_pheno.tmp.clr0)
hseq_ctp_pheno.clr.1e3 <- data.frame(keep_pheno_col, hseq_ctp_pheno.tmp.clr0.tr)

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno.clr.1e3, paste(out_dir, "/hseq_ctp_pheno.clr.1e3.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}

Make dataframe

hseq_ctp_pheno.clr.1e3.long <- melt(hseq_ctp_pheno.clr.1e3, measure.vars = c(all_of(level2_cols)), variable.name = "celltype", value.name = "CTP")
#methylcc_ctp_pheno.clr.zc.long$Level <- ifelse(methylcc_ctp_pheno.clr.zc.long$celltype %in% c("neuron_sum", "glia_sum"), "Level1", "Level2")
hseq_ctp_pheno.clr.1e3.long$Region <- "PFC"
ggplot(hseq_ctp_pheno.clr.1e3.long, aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +
  coord_flip() + theme_bw() + xlab("") +
  ggtitle("Minimum value = 1e-3")

    #facet_grid(Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

6.1.2.1 Adjust for oligodendrocyte proportion

keep_pheno_col <- hseq_ctp_pheno_adjoligo.tmp %>% dplyr::select(all_of(keep_cols))
hseq_ctp_pheno_adjoligo.tmp.clr <- as.matrix(hseq_ctp_pheno_adjoligo.tmp %>% dplyr::select(c(all_of(neuron_n), all_of(glial_rmoligo_n))))

# Make minimum CTP = 1e-3
length(which(hseq_ctp_pheno_adjoligo.tmp.clr <= 1e-3)) # check there are no 0s
## [1] 0
hseq_ctp_pheno_adjoligo.tmp.clr0 <- hseq_ctp_pheno_adjoligo.tmp.clr
hseq_ctp_pheno_adjoligo.tmp.clr0[which(hseq_ctp_pheno_adjoligo.tmp.clr0 <= 1e-3)] <- 1e-3

# Perform clr-transform
hseq_ctp_pheno_adjoligo.tmp.clr0.tr <- clr(hseq_ctp_pheno_adjoligo.tmp.clr0)
hseq_ctp_pheno_adjoligo.clr.1e3 <- data.frame(keep_pheno_col, hseq_ctp_pheno_adjoligo.tmp.clr0.tr)

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno_adjoligo.clr.1e3, paste(out_dir, "/hseq_adjoligo_ctp_pheno.clr.1e3.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}

Make dataframe

hseq_ctp_pheno_adjoligo.clr.1e3.long <- melt(hseq_ctp_pheno_adjoligo.clr.1e3, measure.vars = c(all_of(neuron_n), all_of(glial_rmoligo_n)), variable.name = "celltype", value.name = "CTP")
#methylcc_ctp_pheno.clr.zc.long$Level <- ifelse(methylcc_ctp_pheno.clr.zc.long$celltype %in% c("neuron_sum", "glia_sum"), "Level1", "Level2")
hseq_ctp_pheno_adjoligo.clr.1e3.long$Region <- "PFC"
ggplot(hseq_ctp_pheno_adjoligo.clr.1e3.long, aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +
  coord_flip() + theme_bw() + xlab("") +
  ggtitle("Minimum value = 1e-3")

    #facet_grid(Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

7 Model: per-cell-type, compare raw

if (cellnum == 7) {
  
  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_Oligo, hseq_OPC) ~ group, data = hseq_ctp_pheno.tmp))

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_Oligo, hseq_OPC) ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.tmp))

} else if (cellnum == 9) {
  
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_Oligo_MBP, NonN_OPC) ~ group, data = methylcc_ctp_pheno.tmp))

  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_Oligo_MBP, NonN_OPC) ~ group + age + sex + Batch + PMI, data = methylcc_ctp_pheno.tmp))

}
## Response hseq_Exc :
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.101551 -0.015886  0.007549  0.023769  0.050800 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1772815  0.0126986  13.961   <2e-16 ***
## groupCTL     0.0129328  0.0098035   1.319   0.1915    
## age         -0.0001727  0.0002420  -0.714   0.4778    
## sexM        -0.0200699  0.0100448  -1.998   0.0497 *  
## Batch2      -0.0235835  0.0145235  -1.624   0.1090    
## PMI          0.0001538  0.0002632   0.585   0.5608    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03474 on 69 degrees of freedom
## Multiple R-squared:  0.1354, Adjusted R-squared:  0.0728 
## F-statistic: 2.162 on 5 and 69 DF,  p-value: 0.06829
## 
## 
## Response hseq_Inh :
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.057192 -0.015254  0.002808  0.018723  0.046966 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.641e-01  9.016e-03  18.203   <2e-16 ***
## groupCTL    -3.032e-03  6.961e-03  -0.436   0.6645    
## age         -3.679e-04  1.718e-04  -2.141   0.0358 *  
## sexM         7.765e-03  7.132e-03   1.089   0.2800    
## Batch2      -4.751e-03  1.031e-02  -0.461   0.6464    
## PMI         -1.365e-05  1.869e-04  -0.073   0.9420    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02466 on 69 degrees of freedom
## Multiple R-squared:  0.1267, Adjusted R-squared:  0.0634 
## F-statistic: 2.002 on 5 and 69 DF,  p-value: 0.08917
## 
## 
## Response hseq_Astro :
## 
## Call:
## lm(formula = hseq_Astro ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.072681 -0.016376  0.001653  0.019967  0.047397 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.383e-01  9.692e-03  14.267   <2e-16 ***
## groupCTL     6.892e-04  7.482e-03   0.092    0.927    
## age          5.882e-05  1.847e-04   0.318    0.751    
## sexM        -2.909e-03  7.666e-03  -0.379    0.705    
## Batch2      -5.513e-04  1.108e-02  -0.050    0.960    
## PMI          6.450e-05  2.009e-04   0.321    0.749    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02651 on 69 degrees of freedom
## Multiple R-squared:  0.008457,   Adjusted R-squared:  -0.06339 
## F-statistic: 0.1177 on 5 and 69 DF,  p-value: 0.9881
## 
## 
## Response hseq_Endo :
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0104275 -0.0047421 -0.0006643  0.0037760  0.0155846 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.588e-02  2.328e-03  28.295   <2e-16 ***
## groupCTL    -1.155e-03  1.798e-03  -0.642   0.5228    
## age         -9.535e-05  4.438e-05  -2.149   0.0352 *  
## sexM         9.749e-04  1.842e-03   0.529   0.5983    
## Batch2       1.340e-03  2.663e-03   0.503   0.6163    
## PMI          3.533e-06  4.826e-05   0.073   0.9418    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.00637 on 69 degrees of freedom
## Multiple R-squared:  0.1321, Adjusted R-squared:  0.06923 
## F-statistic: 2.101 on 5 and 69 DF,  p-value: 0.07565
## 
## 
## Response hseq_Micro :
## 
## Call:
## lm(formula = hseq_Micro ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0164782 -0.0050154 -0.0007162  0.0038776  0.0185001 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.508e-02  2.494e-03  30.108  < 2e-16 ***
## groupCTL    -4.345e-03  1.925e-03  -2.257   0.0272 *  
## age         -2.340e-04  4.752e-05  -4.924 5.56e-06 ***
## sexM         3.110e-03  1.972e-03   1.577   0.1194    
## Batch2       2.786e-03  2.852e-03   0.977   0.3321    
## PMI         -3.009e-05  5.168e-05  -0.582   0.5623    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006821 on 69 degrees of freedom
## Multiple R-squared:  0.504,  Adjusted R-squared:  0.4681 
## F-statistic: 14.02 on 5 and 69 DF,  p-value: 1.867e-09
## 
## 
## Response hseq_Oligo :
## 
## Call:
## lm(formula = hseq_Oligo ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.10415 -0.06463 -0.01777  0.04503  0.23677 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.2351113  0.0312857   7.515 1.54e-10 ***
## groupCTL    -0.0009078  0.0241531  -0.038    0.970    
## age          0.0007343  0.0005962   1.231    0.222    
## sexM         0.0138067  0.0247474   0.558    0.579    
## Batch2       0.0239366  0.0357817   0.669    0.506    
## PMI         -0.0002211  0.0006484  -0.341    0.734    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08559 on 69 degrees of freedom
## Multiple R-squared:  0.03759,    Adjusted R-squared:  -0.03215 
## F-statistic: 0.5389 on 5 and 69 DF,  p-value: 0.7461
## 
## 
## Response hseq_OPC :
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0076799 -0.0018509 -0.0002709  0.0017330  0.0165042 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.885e-02  1.247e-03  39.175   <2e-16 ***
## groupCTL    -2.245e-03  9.626e-04  -2.333   0.0226 *  
## age          3.477e-06  2.376e-05   0.146   0.8841    
## sexM         1.186e-03  9.863e-04   1.202   0.2333    
## Batch2       1.896e-03  1.426e-03   1.330   0.1880    
## PMI          5.877e-06  2.584e-05   0.227   0.8208    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.003411 on 69 degrees of freedom
## Multiple R-squared:  0.1637, Adjusted R-squared:  0.1031 
## F-statistic: 2.701 on 5 and 69 DF,  p-value: 0.02747

7.1 Test aggregated sums

summary(lm(hseq_Glial ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = hseq_Glial ~ group, data = ctp_pheno)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.08636 -0.03848 -0.01475  0.02307  0.16556 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.589071   0.008944   65.86   <2e-16 ***
## groupCTL    -0.004178   0.013484   -0.31    0.758    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05797 on 73 degrees of freedom
## Multiple R-squared:  0.001314,   Adjusted R-squared:  -0.01237 
## F-statistic: 0.09602 on 1 and 73 DF,  p-value: 0.7575
summary(lm(mcc_Glial ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = mcc_Glial ~ group, data = ctp_pheno)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31413 -0.09195 -0.03328  0.07573  0.39546 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.52485    0.02257  23.258   <2e-16 ***
## groupCTL     0.02693    0.03402   0.792    0.431    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1462 on 73 degrees of freedom
## Multiple R-squared:  0.008513,   Adjusted R-squared:  -0.005069 
## F-statistic: 0.6268 on 1 and 73 DF,  p-value: 0.4311
summary(lm(h_NeuN_neg ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = h_NeuN_neg ~ group, data = ctp_pheno)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12954 -0.05750 -0.01548  0.03517  0.29099 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.65759    0.01341  49.044   <2e-16 ***
## groupCTL    -0.01854    0.02021  -0.917    0.362    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08689 on 73 degrees of freedom
## Multiple R-squared:  0.01139,    Adjusted R-squared:  -0.002155 
## F-statistic: 0.8409 on 1 and 73 DF,  p-value: 0.3622
summary(lm(hseq_Glial ~ group + age + sex + Batch + PMI, data = ctp_pheno))
## 
## Call:
## lm(formula = hseq_Glial ~ group + age + sex + Batch + PMI, data = ctp_pheno)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.07288 -0.04119 -0.01605  0.02483  0.17014 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.5631836  0.0211710  26.602   <2e-16 ***
## groupCTL    -0.0079633  0.0163444  -0.487    0.628    
## age          0.0004672  0.0004035   1.158    0.251    
## sexM         0.0161683  0.0167466   0.965    0.338    
## Batch2       0.0294078  0.0242135   1.215    0.229    
## PMI         -0.0001773  0.0004388  -0.404    0.687    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05792 on 69 degrees of freedom
## Multiple R-squared:  0.05767,    Adjusted R-squared:  -0.01062 
## F-statistic: 0.8445 on 5 and 69 DF,  p-value: 0.5229
summary(lm(mcc_Glial ~ group + age + sex + Batch + PMI, data = ctp_pheno))
## 
## Call:
## lm(formula = mcc_Glial ~ group + age + sex + Batch + PMI, data = ctp_pheno)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.26423 -0.09819 -0.02626  0.07458  0.40709 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.4634743  0.0529274   8.757 8.28e-13 ***
## groupCTL    -0.0213009  0.0408609  -0.521   0.6038    
## age          0.0022741  0.0010087   2.255   0.0273 *  
## sexM         0.0081511  0.0418663   0.195   0.8462    
## Batch2      -0.0149036  0.0605337  -0.246   0.8063    
## PMI         -0.0001638  0.0010970  -0.149   0.8817    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1448 on 69 degrees of freedom
## Multiple R-squared:  0.08143,    Adjusted R-squared:  0.01487 
## F-statistic: 1.223 on 5 and 69 DF,  p-value: 0.3075
summary(lm(h_NeuN_neg ~ group + age + sex + Batch + PMI, data = ctp_pheno))
## 
## Call:
## lm(formula = h_NeuN_neg ~ group + age + sex + Batch + PMI, data = ctp_pheno)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.11542 -0.05745 -0.01549  0.04996  0.28980 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.6248907  0.0317703  19.669   <2e-16 ***
## groupCTL    -0.0146341  0.0245272  -0.597    0.553    
## age          0.0002027  0.0006055   0.335    0.739    
## sexM         0.0391659  0.0251307   1.558    0.124    
## Batch2       0.0365659  0.0363361   1.006    0.318    
## PMI         -0.0003936  0.0006585  -0.598    0.552    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08691 on 69 degrees of freedom
## Multiple R-squared:  0.06519,    Adjusted R-squared:  -0.00255 
## F-statistic: 0.9624 on 5 and 69 DF,  p-value: 0.447

7.2 Adjust for oligodendrocyte proportion

if (cellnum == 7) {

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_OPC) ~ group, data = hseq_ctp_pheno_adjoligo.tmp))
  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_OPC) ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.tmp))

} else if (cellnum == 9) {
  
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_OPC) ~ group, data = methylcc_ctp_pheno_adjoligo.tmp))
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_OPC) ~ group + age + sex + Batch + PMI, data = methylcc_ctp_pheno_adjoligo.tmp))

}
## Response hseq_Exc :
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.081460 -0.018449 -0.000297  0.022907  0.059760 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.2980234  0.0117820  25.295  < 2e-16 ***
## groupCTL     0.0216534  0.0090959   2.381  0.02005 *  
## age          0.0002093  0.0002245   0.932  0.35449    
## sexM        -0.0254833  0.0093197  -2.734  0.00794 ** 
## Batch2      -0.0210252  0.0134752  -1.560  0.12327    
## PMI          0.0001785  0.0002442   0.731  0.46726    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03223 on 69 degrees of freedom
## Multiple R-squared:  0.2925, Adjusted R-squared:  0.2412 
## F-statistic: 5.704 on 5 and 69 DF,  p-value: 0.0001842
## 
## 
## Response hseq_Inh :
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.046835 -0.023502 -0.009785  0.011028  0.133332 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.785e-01  1.323e-02  21.043   <2e-16 ***
## groupCTL    -1.266e-02  1.022e-02  -1.239   0.2195    
## age         -1.565e-05  2.522e-04  -0.062   0.9507    
## sexM         2.699e-02  1.047e-02   2.578   0.0121 *  
## Batch2       1.663e-02  1.514e-02   1.098   0.2758    
## PMI         -2.594e-04  2.743e-04  -0.946   0.3475    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0362 on 69 degrees of freedom
## Multiple R-squared:  0.1604, Adjusted R-squared:  0.09961 
## F-statistic: 2.637 on 5 and 69 DF,  p-value: 0.03062
## 
## 
## Response hseq_Astro :
## 
## Call:
## lm(formula = hseq_Astro ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.072681 -0.016376  0.001653  0.019967  0.047397 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.383e-01  9.692e-03  14.267   <2e-16 ***
## groupCTL     6.892e-04  7.482e-03   0.092    0.927    
## age          5.882e-05  1.847e-04   0.318    0.751    
## sexM        -2.909e-03  7.666e-03  -0.379    0.705    
## Batch2      -5.513e-04  1.108e-02  -0.050    0.960    
## PMI          6.450e-05  2.009e-04   0.321    0.749    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02651 on 69 degrees of freedom
## Multiple R-squared:  0.008457,   Adjusted R-squared:  -0.06339 
## F-statistic: 0.1177 on 5 and 69 DF,  p-value: 0.9881
## 
## 
## Response hseq_Endo :
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0104275 -0.0047421 -0.0006643  0.0037760  0.0155846 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.588e-02  2.328e-03  28.295   <2e-16 ***
## groupCTL    -1.155e-03  1.798e-03  -0.642   0.5228    
## age         -9.535e-05  4.438e-05  -2.149   0.0352 *  
## sexM         9.749e-04  1.842e-03   0.529   0.5983    
## Batch2       1.340e-03  2.663e-03   0.503   0.6163    
## PMI          3.533e-06  4.826e-05   0.073   0.9418    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.00637 on 69 degrees of freedom
## Multiple R-squared:  0.1321, Adjusted R-squared:  0.06923 
## F-statistic: 2.101 on 5 and 69 DF,  p-value: 0.07565
## 
## 
## Response hseq_Micro :
## 
## Call:
## lm(formula = hseq_Micro ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0164782 -0.0050154 -0.0007162  0.0038776  0.0185001 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.508e-02  2.494e-03  30.108  < 2e-16 ***
## groupCTL    -4.345e-03  1.925e-03  -2.257   0.0272 *  
## age         -2.340e-04  4.752e-05  -4.924 5.56e-06 ***
## sexM         3.110e-03  1.972e-03   1.577   0.1194    
## Batch2       2.786e-03  2.852e-03   0.977   0.3321    
## PMI         -3.009e-05  5.168e-05  -0.582   0.5623    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006821 on 69 degrees of freedom
## Multiple R-squared:  0.504,  Adjusted R-squared:  0.4681 
## F-statistic: 14.02 on 5 and 69 DF,  p-value: 1.867e-09
## 
## 
## Response hseq_OPC :
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0076799 -0.0018509 -0.0002709  0.0017330  0.0165042 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.885e-02  1.247e-03  39.175   <2e-16 ***
## groupCTL    -2.245e-03  9.626e-04  -2.333   0.0226 *  
## age          3.477e-06  2.376e-05   0.146   0.8841    
## sexM         1.186e-03  9.863e-04   1.202   0.2333    
## Batch2       1.896e-03  1.426e-03   1.330   0.1880    
## PMI          5.877e-06  2.584e-05   0.227   0.8208    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.003411 on 69 degrees of freedom
## Multiple R-squared:  0.1637, Adjusted R-squared:  0.1031 
## F-statistic: 2.701 on 5 and 69 DF,  p-value: 0.02747

8 Model: per-cell-type, compare clr-transformed

if (cellnum == 7) {

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_Oligo, hseq_OPC) ~ group, data = hseq_ctp_pheno.clr.1e3))
  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_Oligo, hseq_OPC) ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.clr.1e3))

} else if (cellnum == 9) {
  
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_Oligo_MBP, NonN_OPC) ~ group, data = methylcc_ctp_pheno.clr.1e3))
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_Oligo_MBP, NonN_OPC) ~ group + age + sex + Batch + PMI, data = methylcc_ctp_pheno.clr.1e3))

}
## Response hseq_Exc :
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.80899 -0.09030  0.07274  0.14876  0.32464 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.4467903  0.0817291   5.467 6.85e-07 ***
## groupCTL     0.1030609  0.0630962   1.633   0.1069    
## age         -0.0006471  0.0015576  -0.415   0.6791    
## sexM        -0.1361644  0.0646488  -2.106   0.0388 *  
## Batch2      -0.1601954  0.0934744  -1.714   0.0911 .  
## PMI          0.0011764  0.0016939   0.695   0.4897    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2236 on 69 degrees of freedom
## Multiple R-squared:  0.1669, Adjusted R-squared:  0.1065 
## F-statistic: 2.764 on 5 and 69 DF,  p-value: 0.0247
## 
## 
## Response hseq_Inh :
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.240234 -0.086430  0.009333  0.093671  0.237931 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.3778372  0.0437850   8.629 1.41e-12 ***
## groupCTL    -0.0123064  0.0338028  -0.364   0.7169    
## age         -0.0015065  0.0008345  -1.805   0.0754 .  
## sexM         0.0460320  0.0346345   1.329   0.1882    
## Batch2      -0.0321598  0.0500774  -0.642   0.5229    
## PMI         -0.0002278  0.0009075  -0.251   0.8025    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1198 on 69 degrees of freedom
## Multiple R-squared:  0.1111, Adjusted R-squared:  0.04668 
## F-statistic: 1.725 on 5 and 69 DF,  p-value: 0.1405
## 
## 
## Response hseq_Astro :
## 
## Call:
## lm(formula = hseq_Astro ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51866 -0.09244  0.02004  0.12482  0.24261 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.2043229  0.0586411   3.484 0.000862 ***
## groupCTL     0.0153120  0.0452719   0.338 0.736222    
## age          0.0008999  0.0011176   0.805 0.423445    
## sexM        -0.0262236  0.0463859  -0.565 0.573679    
## Batch2      -0.0010085  0.0670684  -0.015 0.988047    
## PMI          0.0006386  0.0012154   0.525 0.600966    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1604 on 69 degrees of freedom
## Multiple R-squared:  0.03694,    Adjusted R-squared:  -0.03285 
## F-statistic: 0.5293 on 5 and 69 DF,  p-value: 0.7533
## 
## 
## Response hseq_Endo :
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.108795 -0.042661 -0.000779  0.030240  0.221682 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.5220514  0.0237946 -21.940   <2e-16 ***
## groupCTL    -0.0103468  0.0183698  -0.563    0.575    
## age         -0.0006825  0.0004535  -1.505    0.137    
## sexM         0.0108907  0.0188219   0.579    0.565    
## Batch2       0.0292318  0.0272141   1.074    0.287    
## PMI         -0.0002393  0.0004932  -0.485    0.629    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06509 on 69 degrees of freedom
## Multiple R-squared:  0.1085, Adjusted R-squared:  0.0439 
## F-statistic:  1.68 on 5 and 69 DF,  p-value: 0.1511
## 
## 
## Response hseq_Micro :
## 
## Call:
## lm(formula = hseq_Micro ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23722 -0.05557 -0.01264  0.06681  0.21546 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.3936803  0.0319911 -12.306  < 2e-16 ***
## groupCTL    -0.0608927  0.0246977  -2.466   0.0162 *  
## age         -0.0025238  0.0006097  -4.139 9.69e-05 ***
## sexM         0.0434383  0.0253054   1.717   0.0905 .  
## Batch2       0.0424489  0.0365886   1.160   0.2500    
## PMI         -0.0007393  0.0006630  -1.115   0.2687    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08752 on 69 degrees of freedom
## Multiple R-squared:  0.4801, Adjusted R-squared:  0.4425 
## F-statistic: 12.75 on 5 and 69 DF,  p-value: 8.833e-09
## 
## 
## Response hseq_Oligo :
## 
## Call:
## lm(formula = hseq_Oligo ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46452 -0.27187 -0.06668  0.20354  0.88888 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.7064602  0.1267642   5.573 4.51e-07 ***
## groupCTL     0.0010616  0.0978641   0.011    0.991    
## age          0.0036234  0.0024159   1.500    0.138    
## sexM         0.0419818  0.1002722   0.419    0.677    
## Batch2       0.0707946  0.1449815   0.488    0.627    
## PMI         -0.0004685  0.0026273  -0.178    0.859    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3468 on 69 degrees of freedom
## Multiple R-squared:  0.0488, Adjusted R-squared:  -0.02013 
## F-statistic: 0.7079 on 5 and 69 DF,  p-value: 0.6195
## 
## 
## Response hseq_OPC :
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.18558 -0.05405 -0.01524  0.03236  0.29593 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.8196789  0.0319848 -25.627   <2e-16 ***
## groupCTL    -0.0358886  0.0246928  -1.453    0.151    
## age          0.0008365  0.0006096   1.372    0.174    
## sexM         0.0200452  0.0253004   0.792    0.431    
## Batch2       0.0508883  0.0365814   1.391    0.169    
## PMI         -0.0001400  0.0006629  -0.211    0.833    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0875 on 69 degrees of freedom
## Multiple R-squared:  0.09166,    Adjusted R-squared:  0.02584 
## F-statistic: 1.393 on 5 and 69 DF,  p-value: 0.2379

8.1 Adjust for oligodendrocyte proportion

if (cellnum == 7) {

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_OPC) ~ group, data = hseq_ctp_pheno_adjoligo.clr.1e3))
  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_OPC) ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.clr.1e3))

} else if (cellnum == 9) {
  
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_OPC) ~ group, data = methylcc_ctp_pheno_adjoligo.clr.1e3))
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_OPC) ~ group + age + sex + Batch + PMI, data = methylcc_ctp_pheno_adjoligo.clr.1e3))

}
## Response hseq_Exc :
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38230 -0.06098  0.00562  0.09455  0.26104 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.9217721  0.0510332  18.062   <2e-16 ***
## groupCTL     0.0875862  0.0393985   2.223   0.0295 *  
## age          0.0014563  0.0009726   1.497   0.1389    
## sexM        -0.0990073  0.0403680  -2.453   0.0167 *  
## Batch2      -0.0866855  0.0583673  -1.485   0.1421    
## PMI          0.0005161  0.0010577   0.488   0.6271    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1396 on 69 degrees of freedom
## Multiple R-squared:  0.2941, Adjusted R-squared:  0.243 
## F-statistic:  5.75 on 5 and 69 DF,  p-value: 0.0001711
## 
## 
## Response hseq_Inh :
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.18190 -0.08986 -0.03313  0.05097  0.45388 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.8528190  0.0494782  17.236   <2e-16 ***
## groupCTL    -0.0277810  0.0381980  -0.727   0.4695    
## age          0.0005969  0.0009430   0.633   0.5288    
## sexM         0.0831891  0.0391379   2.126   0.0371 *  
## Batch2       0.0413502  0.0565888   0.731   0.4674    
## PMI         -0.0008881  0.0010255  -0.866   0.3895    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1354 on 69 degrees of freedom
## Multiple R-squared:  0.09042,    Adjusted R-squared:  0.02451 
## F-statistic: 1.372 on 5 and 69 DF,  p-value: 0.2456
## 
## 
## Response hseq_Astro :
## 
## Call:
## lm(formula = hseq_Astro ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.59713 -0.09881  0.03542  0.13927  0.25091 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.1434470  0.0659359   2.176    0.033 *
## groupCTL     0.0233147  0.0509037   0.458    0.648  
## age          0.0007541  0.0012566   0.600    0.550  
## sexM        -0.0343067  0.0521562  -0.658    0.513  
## Batch2      -0.0200648  0.0754116  -0.266    0.791  
## PMI          0.0008516  0.0013666   0.623    0.535  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1804 on 69 degrees of freedom
## Multiple R-squared:  0.03848,    Adjusted R-squared:  -0.03119 
## F-statistic: 0.5523 on 5 and 69 DF,  p-value: 0.736
## 
## 
## Response hseq_Endo :
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13055 -0.05533  0.00424  0.03807  0.16434 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.829e-01  2.415e-02 -24.134   <2e-16 ***
## groupCTL    -2.344e-03  1.865e-02  -0.126   0.9003    
## age         -8.283e-04  4.603e-04  -1.799   0.0763 .  
## sexM         2.808e-03  1.911e-02   0.147   0.8836    
## Batch2       1.018e-02  2.763e-02   0.368   0.7137    
## PMI         -2.633e-05  5.006e-04  -0.053   0.9582    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06608 on 69 degrees of freedom
## Multiple R-squared:  0.07529,    Adjusted R-squared:  0.008279 
## F-statistic: 1.124 on 5 and 69 DF,  p-value: 0.3561
## 
## 
## Response hseq_Micro :
## 
## Call:
## lm(formula = hseq_Micro ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.245880 -0.048520 -0.004683  0.045564  0.186385 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.4545562  0.0281792 -16.131  < 2e-16 ***
## groupCTL    -0.0528899  0.0217549  -2.431   0.0177 *  
## age         -0.0026696  0.0005370  -4.971 4.66e-06 ***
## sexM         0.0353552  0.0222902   1.586   0.1173    
## Batch2       0.0233926  0.0322289   0.726   0.4704    
## PMI         -0.0005263  0.0005840  -0.901   0.3706    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07709 on 69 degrees of freedom
## Multiple R-squared:  0.5197, Adjusted R-squared:  0.4849 
## F-statistic: 14.93 on 5 and 69 DF,  p-value: 6.423e-10
## 
## 
## Response hseq_OPC :
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + sex + Batch + PMI, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.15775 -0.03805 -0.01182  0.03293  0.22246 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.806e-01  2.336e-02 -37.698   <2e-16 ***
## groupCTL    -2.789e-02  1.803e-02  -1.546    0.127    
## age          6.906e-04  4.452e-04   1.551    0.125    
## sexM         1.196e-02  1.848e-02   0.647    0.520    
## Batch2       3.183e-02  2.672e-02   1.192    0.238    
## PMI          7.298e-05  4.841e-04   0.151    0.881    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0639 on 69 degrees of freedom
## Multiple R-squared:  0.09188,    Adjusted R-squared:  0.02607 
## F-statistic: 1.396 on 5 and 69 DF,  p-value: 0.2366

9 Model: per-cell-type, compare ratios between other CTPs and Inh

  • dealing with this directly as a ratio, so no need to do transform (what an alr would do anyway)
hseq_ctp_pheno.tmp.clr0_ratio <- data.frame(hseq_ctp_pheno.tmp.clr0)
hseq_ctp_pheno.tmp.clr0_ratio$exc_inh <- hseq_ctp_pheno.tmp.clr0_ratio$hseq_Exc/hseq_ctp_pheno.tmp.clr0_ratio$hseq_Inh

hseq_ctp_pheno.tmp.clr0_ratio.df <- data.frame(keep_pheno_col, hseq_ctp_pheno.tmp.clr0_ratio)

summary(lm(exc_inh ~ group, data = hseq_ctp_pheno.tmp.clr0_ratio.df))
## 
## Call:
## lm(formula = exc_inh ~ group, data = hseq_ctp_pheno.tmp.clr0_ratio.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46423 -0.12028  0.01837  0.13599  0.49764 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.98616    0.03121  31.602  < 2e-16 ***
## groupCTL     0.16022    0.04704   3.406  0.00108 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2022 on 73 degrees of freedom
## Multiple R-squared:  0.1371, Adjusted R-squared:  0.1253 
## F-statistic:  11.6 on 1 and 73 DF,  p-value: 0.001076
summary(lm(exc_inh ~ group + age + sex + Batch + BrainBank, data = hseq_ctp_pheno.tmp.clr0_ratio.df))
## 
## Call:
## lm(formula = exc_inh ~ group + age + sex + Batch + BrainBank, 
##     data = hseq_ctp_pheno.tmp.clr0_ratio.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44736 -0.10079  0.01096  0.14471  0.28398 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.124895   0.063984  17.581  < 2e-16 ***
## groupCTL         0.119075   0.053707   2.217  0.02996 *  
## age              0.001529   0.001446   1.058  0.29397    
## sexM            -0.186189   0.056096  -3.319  0.00145 ** 
## Batch2          -0.102883   0.079014  -1.302  0.19728    
## BrainBankNICHD  -0.047068   0.059584  -0.790  0.43231    
## BrainBankOxford -0.035315   0.067053  -0.527  0.60013    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1879 on 68 degrees of freedom
## Multiple R-squared:  0.3062, Adjusted R-squared:  0.245 
## F-statistic: 5.003 on 6 and 68 DF,  p-value: 0.0002682

10 Model: ALDEx2 for differential proportion

  • Documentation: https://www.bioconductor.org/packages/release/bioc/vignettes/ALDEx2/inst/doc/ALDEx2_vignette.pdf
  • Uses Bayesian sampling from a Dirichlet distribution to estimate underlying technical variation
  • Input count data, takes as given (requires integers, so rounded the back-derived count data)
  • –> infers technical variation (eg. sequencing the same sample again) multiple times using aldex.clr()
    • aldex.clr(): recommend mc.samples >128 for t-test, >1000 for rigorous effect size calculations, >16 for ANOVA
  • Outputs (see p10 of documentation):
    • Tables
      • we*: Welch’s t-test
      • wi*: Wilcoxon rank test
      • *ep: expected p-value
      • *eBH: expected BH-correction
      • rab*: median clr value for a given group
      • dif.btw: median difference in clr value between groups
      • dif.win: median of largest difference in clr values within groups
      • effect: median effect size (diff.btw/max(diff.win))
      • overlap: prop of effect size that overlaps 0 (ie. no effect)
    • Plots
      • relationships between Abundance (clr) vs Difference + Dispersion vs Difference
      • red denotes differentially abundant with q < 0.1, grey abundant but not differentially-abundant, black are rare but not differentially abundant)

10.1 houseman_seq

10.1.1 No covariates

conds <- hseq_ctp_pheno.clr.1e3$group

aldex_in <- sapply(data.frame(t(hseq_ctp_pheno.tmp[,c(all_of(level2_cols))])*10000), as.integer)
rownames(aldex_in) <- level2_cols
colnames(aldex_in) <- hseq_ctp_pheno.tmp$IID

x <- aldex.clr(aldex_in, conds, mc.samples=128, denom="all", verbose=F)
## operating in serial mode
## computing center with all features
x.tt <- aldex.ttest(x, paired.test = F)

x.effect <- aldex.effect(x, verbose = F)

x.all <- data.frame(x.tt, x.effect)

tmp <- rownames(x.all)
x.all <- as.data.frame(sapply(x.all, function(x) signif(x, 2)))
rownames(x.all) <- tmp

datatable(x.all)
par(mfrow=c(1,2))
aldex.plot(x.all, type="MA", test="welch", xlab="Log-ratio abundance",ylab="Difference")
aldex.plot(x.all, type="MW", test="welch", xlab="Dispersion",ylab="Difference")

10.1.2 glm + covariates

  • Runs a glm
  • Input model matrix
conds <- hseq_ctp_pheno.tmp$group
cov.aldex <- methylcc_ctp_pheno.tmp %>% dplyr::select(c("group", "age", "sex", "Batch", "BrainBank"))
mm <- model.matrix(~., cov.aldex)

x <- aldex.clr(aldex_in, mm, mc.samples=128, denom="all", verbose=F)

x.glm <- aldex.glm(x, mm)
## Warning in if (verbose) message("running tests for each MC instance:"): the
## condition has length > 1 and only the first element will be used
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## |--
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -(25%)
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -(50%)
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -(75%)
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -|
x.all2 <- data.frame(x.glm)

tmp <- rownames(x.all2)
x.all2 <- as.data.frame(sapply(x.all2, function(x) signif(x, 2)))
rownames(x.all2) <- tmp

datatable(x.all2)

11 Model: ANCOM for differential proportion

11.1 methylCC

11.1.1 Prep

# Prepare data
metadata <- hseq_ctp_pheno.tmp %>% dplyr::select(c("IID", "group", "age", "sex", "Batch", "BrainBank"))
rownames(metadata) <- metadata$IID
metadata$group <- as.factor(metadata$group)

ancom_features <- t(hseq_ctp_pheno.tmp[,c(level2_cols)])*10000
colnames(ancom_features) <- hseq_ctp_pheno.tmp$IID
ancom_features <- ancom_features[,which(colnames(ancom_features) %in% metadata$IID)]

# Check variables in the correct order
identical(colnames(ancom_features), metadata$IID)
## [1] TRUE
ancom_table <- feature_table_pre_process(ancom_features, metadata, "IID", group_var = NULL, out_cut = 0.05, zero_cut = 0.90, lib_cut = 10, neg_lb)

11.1.2 Case-control analysis with no covariates

# Run tests with covariates (age + sex + diet)
ancom_out_nocov <- ANCOM(ancom_table$feature_table, ancom_table$meta_data, ancom_table$structure_zeros, main_var = "group", p_adj_method = "BH")

ancom_out_nocov$out <- ancom_out_nocov$out[order(ancom_out_nocov$out$W, decreasing = T),]

datatable(ancom_out_nocov$out)
ancom_out_nocov$fig

11.1.3 Case-control analysis with covariates

# Run tests with covariates (age + sex + diet)
ancom_out_cov <- ANCOM(ancom_table$feature_table, ancom_table$meta_data, ancom_table$structure_zeros, main_var = "group", p_adj_method = "BH", adj_formula = "age + sex + Batch + BrainBank")

ancom_out_cov$out <- ancom_out_cov$out[order(ancom_out_cov$out$W, decreasing = T),]

datatable(ancom_out_cov$out)
ancom_out_cov$fig

12 Correlation plots

cov_var <- c("IID", "group", "age", "sex", "Batch", "BrainBank")

12.1 Glial

12.1.1 Aggregated

  • eBayes + Houseman (coefs_brain): h_NeuN_neg
  • methylCC (summed): Glial
  • smartSVA: sSV1
  • eBayes + Houseman (summed coefs_sub): h_Glial
    • include methylCC oligo since it seems to go together …
neunn <- ctp_pheno %>% dplyr::select(c(all_of(cov_var), "h_NeuN_neg", "hseq_Glial", "mcc_Glial", "sSV1", "sSV2", "h_Glial", "hseq_Oligo", "hseq_OPC", "mcc_Oligo", "mcc_OPC", "h_OLIGO", "h_OPC", "celfie_Glial", "celfie_Oligo", "celfie_OPC", "VAEe3", "VAEe9"))

# y = h_NeuN_neg
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "h_NeuN_neg"), measure.var = c("hseq_Glial", "mcc_Glial", "h_Glial", "celfie_Glial", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=h_NeuN_neg)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = FALSE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: hseq_Glial
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "hseq_Glial"), measure.var = c("h_NeuN_neg", "mcc_Glial", "h_Glial", "celfie_Glial", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=hseq_Glial)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = FALSE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: mcc_Glial
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "mcc_Glial"), measure.var = c("h_NeuN_neg", "hseq_Glial", "h_Glial", "celfie_Glial", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=mcc_Glial)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = FALSE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: h_Glial
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "h_Glial"), measure.var = c("h_NeuN_neg", "hseq_Glial", "mcc_Glial", "celfie_Glial", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=h_Glial)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = FALSE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: celfie_Glial
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "celfie_Glial"), measure.var = c("h_NeuN_neg", "hseq_Glial", "mcc_Glial", "h_Glial", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=celfie_Glial)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = FALSE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: sSV1
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "sSV1"), measure.var = c("h_NeuN_neg", "hseq_Oligo", "mcc_Oligo", "h_OLIGO", "celfie_Oligo", "VAEe9"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=sSV1)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = FALSE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: sSV1
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "sSV1"), measure.var = c("h_NeuN_neg", "hseq_Glial", "mcc_Glial", "h_Glial", "celfie_Glial", "hseq_Oligo", "hseq_OPC", "mcc_Oligo", "mcc_OPC", "h_OLIGO", "h_OPC", "celfie_Oligo", "celfie_OPC", "VAEe9"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=sSV1)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = FALSE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: VAEe9
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "VAEe9"), measure.var = c("h_NeuN_neg", "hseq_Oligo", "mcc_Oligo", "h_OLIGO", "celfie_Oligo", "sSV1"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=VAEe9)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = FALSE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

12.1.2 Compare individual glial sub-type CTPs

# Comparison between houseman extremes vs eBayes
ctp_pheno_sub_glial <- data.frame(ctp_pheno[,c("hseq_Glial", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "h_NeuN_neg", "h_Glial", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC")])

ggpairs(ctp_pheno_sub_glial)

# Comparison between houseman extremes vs methylCC extremes
ctp_pheno_sub_glial <- data.frame(ctp_pheno[,c("hseq_Glial", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "h_NeuN_neg", "h_Glial", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC")])

ggpairs(ctp_pheno_sub_glial)

# Comparison between houseman extremes vs CelFIE
ctp_pheno_sub_glial <- data.frame(ctp_pheno[,c("hseq_Glial", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "h_NeuN_neg", "celfie_Glial", "celfie_Astro", "celfie_Endo", "celfie_Micro", "celfie_Oligo", "celfie_OPC")])

ggpairs(ctp_pheno_sub_glial)

# Comparison between houseman array vs methylCC extremes
ctp_pheno_sub_glial <- data.frame(ctp_pheno[,c("h_Glial", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC", "h_NeuN_neg", "mcc_Glial", "mcc_Astro", "mcc_Endo", "mcc_Micro", "mcc_Oligo", "mcc_OPC")])

ggpairs(ctp_pheno_sub_glial)

12.2 Neuronal

12.2.1 Aggregated

# Select columns
neunp <- ctp_pheno %>% dplyr::select(c(all_of(cov_var), "h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "sSV1", "VAEe3"))

# y: h_NeuN_pos
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "h_NeuN_pos"), measure.var = c("hseq_Neuron", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=h_NeuN_pos)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = F) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: hseq_Neuron
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "hseq_Neuron"), measure.var = c("h_NeuN_pos", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=hseq_Neuron)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = F) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: mcc_Neuron
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "mcc_Neuron"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "h_Neuron", "celfie_Neuron", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=mcc_Neuron)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = F) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: h_Neuron
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "h_Neuron"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "celfie_Neuron", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=h_Neuron)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = F) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: celfie_Neuron
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "celfie_Neuron"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "h_Neuron", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=celfie_Neuron)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = F) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: sSV1
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "sSV1"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=sSV1)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = F) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: VAEe3
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "VAEe3"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "sSV1"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=VAEe3)) + geom_point(aes(colour = age)) + scale_colour_viridis(discrete = F) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

12.2.2 Compare individual neuronal sub-type CTPs

if (cellnum == 7) {
  ctp_pheno_sub_neuron <- data.frame(ctp_pheno[,c("hseq_Neuron", "hseq_Exc", "hseq_Inh", "h_Neuron", "h_GLU", "h_GABA", "mcc_Neuron", "mcc_Exc", "mcc_Inh", "h_NeuN_pos")])

} else if (cellnum == 9) {
  ctp_pheno_sub_neuron <- data.frame(ctp_pheno[,c("Neuron", "Exc_upper", "Exc_lower", "Inh_MGE", "Inh_CGE", "h_NeuN_pos", "h_Neuron", "h_GABA", "h_GLU")])
}

ggpairs(ctp_pheno_sub_neuron)

12.3 All together

ctp_pheno_sub <- data.frame(ctp_pheno[,c("hseq_Exc", "hseq_Inh", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "h_GLU", "h_GABA", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC")])

#svg(paste(out_dir, "/", filen, "_ctp_hseq_harray_cor_matrix.svg", sep = ""), height = 6, width = 6)
jpeg(paste(out_dir, "/", filen, "_ctp_hseq_harray_cor_matrix.jpeg", sep = ""), height = 600, width = 600, quality = 100)
g <- ggduo(ctp_pheno_sub, colnames(ctp_pheno_sub)[1:7], colnames(ctp_pheno_sub)[8:14], xlab = "Houseman + sequencing reference", ylab = "Houseman + array reference", title = "UCLA_ASD")
print(g)
## plot: [1,1] [>-------------------------------------------------] 2% est: 0s
## plot: [1,2] [=>------------------------------------------------] 4% est: 1s
## plot: [1,3] [==>-----------------------------------------------] 6% est: 1s
## plot: [1,4] [===>----------------------------------------------] 8% est: 1s
## plot: [1,5] [====>---------------------------------------------] 10% est: 1s
## plot: [1,6] [=====>--------------------------------------------] 12% est: 2s
## plot: [1,7] [======>-------------------------------------------] 14% est: 2s
## plot: [2,1] [=======>------------------------------------------] 16% est: 2s
## plot: [2,2] [========>-----------------------------------------] 18% est: 2s
## plot: [2,3] [=========>----------------------------------------] 20% est: 2s
## plot: [2,4] [==========>---------------------------------------] 22% est: 1s
## plot: [2,5] [===========>--------------------------------------] 24% est: 1s
## plot: [2,6] [============>-------------------------------------] 27% est: 1s
## plot: [2,7] [=============>------------------------------------] 29% est: 1s
## plot: [3,1] [==============>-----------------------------------] 31% est: 1s
## plot: [3,2] [===============>----------------------------------] 33% est: 1s
## plot: [3,3] [================>---------------------------------] 35% est: 1s
## plot: [3,4] [=================>--------------------------------] 37% est: 1s
## plot: [3,5] [==================>-------------------------------] 39% est: 1s
## plot: [3,6] [===================>------------------------------] 41% est: 1s
## plot: [3,7] [====================>-----------------------------] 43% est: 1s
## plot: [4,1] [=====================>----------------------------] 45% est: 1s
## plot: [4,2] [======================>---------------------------] 47% est: 1s
## plot: [4,3] [=======================>--------------------------] 49% est: 1s
## plot: [4,4] [=========================>------------------------] 51% est: 1s
## plot: [4,5] [==========================>-----------------------] 53% est: 1s
## plot: [4,6] [===========================>----------------------] 55% est: 1s
## plot: [4,7] [============================>---------------------] 57% est: 1s
## plot: [5,1] [=============================>--------------------] 59% est: 1s
## plot: [5,2] [==============================>-------------------] 61% est: 1s
## plot: [5,3] [===============================>------------------] 63% est: 1s
## plot: [5,4] [================================>-----------------] 65% est: 1s
## plot: [5,5] [=================================>----------------] 67% est: 1s
## plot: [5,6] [==================================>---------------] 69% est: 1s
## plot: [5,7] [===================================>--------------] 71% est: 1s
## plot: [6,1] [====================================>-------------] 73% est: 1s
## plot: [6,2] [=====================================>------------] 76% est: 1s
## plot: [6,3] [======================================>-----------] 78% est: 0s
## plot: [6,4] [=======================================>----------] 80% est: 0s
## plot: [6,5] [========================================>---------] 82% est: 0s
## plot: [6,6] [=========================================>--------] 84% est: 0s
## plot: [6,7] [==========================================>-------] 86% est: 0s
## plot: [7,1] [===========================================>------] 88% est: 0s
## plot: [7,2] [============================================>-----] 90% est: 0s
## plot: [7,3] [=============================================>----] 92% est: 0s
## plot: [7,4] [==============================================>---] 94% est: 0s
## plot: [7,5] [===============================================>--] 96% est: 0s
## plot: [7,6] [================================================>-] 98% est: 0s
## plot: [7,7] [==================================================]100% est: 0s
dev.off()
## X11cairo 
##        2

12.4 Unsupervised comparison

12.4.1 Neurons

if (cellnum == 7) {
  ctp_pheno_sv_neuron <- data.frame(ctp_pheno[,c("h_NeuN_pos", "hseq_Neuron", "hseq_Exc", "hseq_Inh", "sSV1", "sSV2", "sSV3", "sSV4", "sSV5", "sSV6", "sSV7", "sSV8")])
} else if (cellnum == 9) {
  ctp_pheno_sv_neuron <- data.frame(ctp_pheno[,c("h_NeuN_pos", "Neuron", "Exc_upper", "Exc_lower", "Inh_MGE", "Inh_CGE", "sSV1", "sSV2", "sSV3", "sSV4", "sSV5", "sSV6", "sSV7", "sSV8")])
}

ggpairs(ctp_pheno_sv_neuron)

12.4.2 Glial

ctp_pheno_sv_glial <- data.frame(ctp_pheno[,c("h_NeuN_neg", "hseq_Glial", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "sSV1", "sSV2", "sSV3", "sSV4", "sSV5", "sSV6", "sSV7", "sSV8")])

ggpairs(ctp_pheno_sv_glial)

13 For presentations

# For presentations
ggplot(neunn, aes(x = sSV1, y = hseq_Oligo)) + geom_point() + geom_smooth(method = "lm") + xlab("smartSV1 (reference-free)") + ylab("Houseman (seq) Oligo proportion")
## `geom_smooth()` using formula 'y ~ x'

ggsave(paste(out_dir, "/hseqOligo_vs_smartSV1.png", sep = ""), height = 4, width = 4)
## `geom_smooth()` using formula 'y ~ x'
ggplot(neunn, aes(x = h_NeuN_neg, y = hseq_Glial)) + geom_point() + geom_smooth(method = "lm") + xlab("NeuN_neg proportion (reference-based)") + ylab("Sum of Houseman (seq) Glial proportions")
## `geom_smooth()` using formula 'y ~ x'

ggsave(paste(out_dir, "/NeuN_neg_vs_hseqGlial.png", sep = ""), height = 4, width = 4)
## `geom_smooth()` using formula 'y ~ x'